#####
# Replication for: "Can Political Speech Foster Tolerance of Immigrants?" by Schleiter, Tavits, and Ward.
# Table S.13
#####

library(here)
library(data.table)
library(xtable)

# source the custom functions
source("functions.R")

# load the pooled data if not already in workspace
if(!exists("pooled")){
  pooled <- fread(here("data", "pooled.csv"))
}

# coerce the heterogeneity measure to a factor if not already
if(class(pooled$pid2_het) != "factor"){
  pooled[ , pid2_het := factor(pid2_het, levels = c("Independent", "Democrat", "Republican"))]
}


# Helper functions --------------------------------------------------------

# These three functions do the starch of the analysis. The first fits all 9 models, the second extracts the 9 marginal effects per model. The third does the battery of p-values
allCovPIDfits <- 
  function(y, samp, data = pooled, chatty = T){
    if(samp != "Replication"){
      form <- as.formula(paste0(y, "~ CH + NM + CS + CH*pid2_het + NM*pid2_het + CS*pid2_het + female + age + I(age^2) + race2 + edu2 + news2 + region2 + factor(wave)"))
    } else {
      form <- as.formula(paste0(y, "~ CH + NM + CS + CH*pid2_het + NM*pid2_het + CS*pid2_het + female + age + I(age^2) + race2 + edu2 + news2 + region2"))
    }
    
    sample_use <- c("Main" = "replication == 0", "Replication" = "replication == 1", "Pooled" = "")[samp]
    
    if(samp != "Pooled"){
      mod <- lm(formula = form, data = data[eval(parse(text = sample_use))])
    } else {
      mod <- lm(formula = form, data = data)
    }
    
    if(chatty) print(summary(mod))
    return(mod)
  }

PID_ME_extract <- 
  function(mod, level = .95){
    out <- expand.grid("treatment" = c("CH", "NM", "CS"), "PID" = c("I", "D", "R"), stringsAsFactors = F); setDT(out)
    
    out[PID == "I" , c("est", "lower", "upper") := .(
      coef(mod)[c("CH", "NM", "CS")],
      confint(mod, level = level)[c("CH", "NM", "CS"), 1],
      confint(mod, level = level)[c("CH", "NM", "CS"), 2]
    )]
    
    for(n in 4:9){
      vars <- c(out$treatment[n], paste0(out$treatment[n], ":pid2_het", ifelse(out$PID[n] == "D", "Democrat", "Republican")))
      est <- sum(coef(mod)[vars])
      se <- sqrt(vcov(mod)[vars[1], vars[1]] + vcov(mod)[vars[2], vars[2]] + 2*vcov(mod)[vars[1], vars[2]])
      set(out, i = n, j = "est", value = est)
      set(out, i = n, j = "lower", value = est - 1.96*se)
      set(out, i = n, j = "upper", value = est + 1.96*se)
    }
    
    return(out)
  }

# p-values that I want:
# for each one, compare that to 0 (this is easy for I, hard for others)
# For each, compare I to D (just the signif. of the interaction)
# for each, compare I to R (just the signif. of the interaction)
# For each, compare D to R (test of significant differences between the interaction terms)

PID_pval_extract <-
  function(mod){
    out <- expand.grid("treatment" = c("CH", "NM", "CS"), "test" = c("I_v_0", "D_v_0", "R_v_0", "I_v_D", "I_v_R", "D_v_R"), stringsAsFactors = F); setDT(out)
    
    # knock out the ones that aren't marginal effects
    out[test == "I_v_0", p := summary(mod)$coefficients[treatment, 4]]
    out[test == "I_v_D", p := summary(mod)$coefficients[paste0(treatment, ":pid2_hetDemocrat"), 4]]
    out[test == "I_v_R", p := summary(mod)$coefficients[paste0(treatment, ":pid2_hetRepublican"), 4]]
    
    # D_v_0 and R_v_0 are easy enough
    for(n in which(out$test %in% c("D_v_0", "R_v_0"))){
      vars <- c(out$treatment[n], paste0(out$treatment[n], ":pid2_het", ifelse(out$test[n] == "D_v_0", "Democrat", "Republican")))
      est <- sum(coef(mod)[vars])
      se <- sqrt(vcov(mod)[vars[1], vars[1]] + vcov(mod)[vars[2], vars[2]] + 2*vcov(mod)[vars[1], vars[2]])
      set(out, i = n, j = "p", value = 2*(1-pnorm(abs(est/se))))
    }
    
    # D_v_R is test of significant difference between the interaction terms
    for(n in which(out$test %in% "D_v_R")){
      vars <- paste0(out$treatment[n], c(":pid2_hetDemocrat", ":pid2_hetRepublican"))
      top <- coef(mod)[vars[1]] - coef(mod)[vars[2]]
      se <- sqrt(vcov(mod)[vars[1], vars[1]] + vcov(mod)[vars[2], vars[2]] - 2*vcov(mod)[vars[1], vars[2]])
      set(out, i = n, j = "p", value = 2*(1 - pnorm(abs(top/se))))
    }
    
    return(out)
  }


# Fitting -----------------------------------------------------------------

pid_fits <- expand.grid("y" = c("immPCA", "imm_neighbors2", "imm_increase2"), "samp" = c("Main", "Replication", "Pooled")); setDT(pid_fits)

pid_fits[ , fit := Map(allCovPIDfits, y, samp, chatty = F)]
pid_me<- pid_fits[ , PID_ME_extract(mod = fit[[1]]), by = .(y, samp)]
pid_p <- pid_fits[ , PID_pval_extract(fit[[1]]), by = .(y, samp)]

# Reporting ---------------------------------------------------------------

p_print <- dcast(pid_p, samp + y + treatment ~ test, value.var = "p")

p_print[ , y := fcase(
  y == "immPCA", "Index",
  y == "imm_neighbors2", "Neighbors",
  y == "imm_increase2", "Increase"
)]

p_print[ , treatment := fcase(
  treatment == "NM", "Norms",
  treatment == "CH", "Common Humanity",
  treatment == "CS", "Countering Stereotypes"
)]

# much of the formatting on this table is done in tex.
print(
  xtable(
    p_print[ , .(y, treatment, D_v_0, I_v_0, R_v_0, I_v_D, D_v_R, I_v_R)],
    caption = "P-values from Party ID Heterogeneity Analysis",
    align = rep(c("l", "c"), times = c(3, 6))
  ), 
  include.rownames = FALSE,
  booktabs = T
)

